# -*- coding: utf-8 -*-
from tests.base_test import base_test_case

logger = base_test_case.get_logger(__name__)
TEST_DATA_DIR = base_test_case.test_data_dir
OUTPUT_DATA_DIR = base_test_case.output_data_dir

if __name__ == '__main__':
    input_file = str(TEST_DATA_DIR / "table.jpg")
    # input_file = str(OUTPUT_DATA_DIR / "25-注会-轻1-财务成本管理[上册](第3章)/page_0.jpg")

    from paddleocr import DocVLM
    model = DocVLM(model_name="PP-DocBee2-3B")
    results = model.predict(
        # input={"image": input_file, "query": "识别这份表格的内容, 以markdown格式输出"},
        input={"image": input_file, "query": "识别这份表格的内容, 以HTML格式输出"},
        batch_size=1
    )
    for res in results:
        res.print()
        res.save_to_json(f"./output/res.json")
    pass
